A. Number
1)Frequency of tickets in five boroughs (Manhattan, Kings, The Bronx, State Island, Queens)
violation = read_csv("./Open_Parking_and_Camera_Violations.csv") %>%
janitor::clean_names() %>%
rename(borough = county) %>% # rename county to borough
mutate(
borough = case_when(
borough %in% c("BK","K", "Kings") ~ "Brooklyn",
borough %in% c("BX", "Bronx") ~ "Bronx",
borough %in% c("Q", "QN", "Qns") ~ "Queens",
borough %in% c("ST", "R", "Rich", "RICH") ~ "State Island",
borough %in% c("NY", "MN") ~ "Manhattan"),
issue_date = as.Date(issue_date, format = "%m/%d/%y"),
weekday = weekdays(issue_date),
year = year(issue_date),
month = month(issue_date),
day = day(issue_date)
) %>% # make the borough the same
filter(borough != "A", # get rid of "A"
weekday != "NA",
month != "11",
month != "12") # remove data that cannot turn into weekday
## Rows: 1770806 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): Plate, State, License Type, Issue Date, Violation Time, Violation,...
## dbl (7): Summons Number, Fine Amount, Penalty Amount, Interest Amount, Redu...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# another potential cleaning 1)select the variables we needed 2)convert the mon into character ?
violation %>%
count(borough) %>%
mutate(
borough = fct_reorder(borough, n)) %>%
plot_ly(x = ~borough, y = ~n, color = ~borough, type = "bar", colors = "viridis") %>%
layout(title = "Frequency of Tickets in Boroughs in 2021",
xaxis = list(title = "Borough"),
yaxis = list(title = "Number of Tickets"))
violation %>%
group_by(borough) %>%
count(month) %>%
mutate(month = month.abb[as.numeric(month)],
month = fct_relevel(month, c("Jan", "Feb", "Mar", "Apr","May","Jun", "Jul", "Aug", "Sep", "Oct"))) %>%
plot_ly(x = ~month, y = ~n, color = ~borough, type = "bar", colors = "viridis")%>%
layout(title = "Frequency of Tickets in Boroughs in 2021",
xaxis = list(title = "Borough"),
yaxis = list(title = "Number of Tickets in Each Month in 2021"))
Frequency of violation type in each borough.
violation_type = violation %>%
group_by(borough) %>%
count(violation) %>%
mutate(
violation = fct_reorder(violation, n)
) %>%
arrange(desc(n)) %>%
mutate(index = row_number()) %>%
filter(index <= 10) %>%
ggplot(aes(x = violation, y = n, fill = violation)) +
geom_bar(stat = "identity") + facet_grid(. ~ borough) +
labs(
title = "Frequency of Violation Type in Each Borough",
xlab = "Violation Type",
ylab = "Number of tickets") +
theme(
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.text = element_text(size = 8)
)
ggplotly(violation_type)